\documentclass[12pt]{article}
\usepackage[margin=10mm]{geometry}
\usepackage{tikz}
\usetikzlibrary{3d}

\begin{document}
\begin{tikzpicture}[x={(1cm,0cm)},y={(0cm,1cm)},z={(0.5cm,0.5cm)}]

\node[canvas is zy plane at x=0,fill=white] at (0.5,0) {\includegraphics[width=7cm,height=7cm]{001763.jpg}};

\def\x{1}
\def\y{2}
\def\z{-4}
\def\w{1}
\def\h{4}
\def\t{4}
\filldraw [color=white,draw=black, line width=0.5pt](\x+\w,\z,\y+\h) -- (\x+\w,\z+\t,\y+\h)  -- (\x,\z+\t,\y+\h)-- (\x,\z+\t,\y) ;
\filldraw [color=white,draw=black, line width=0.5pt] (\x,\z,\y) -- (\x+\w,\z,\y) node[midway,below,black, line width=0.5pt] {96} -- 
    (\x+\w,\z+\t,\y) -- (\x,\z+\t,\y) -- (\x,\z,\y);
\filldraw [color=white,draw=black, line width=0.5pt] (\x+\w,\z,\y) -- (\x+\w,\z,\y+\h);
\filldraw [color=white,draw=black, line width=0.5pt] (\x+\w,\z+\t,\y) -- (\x+\w,\z+\t,\y+\h);
\filldraw [color=white,draw=black, line width=0.5pt] (\x,\z+\t,\y) -- (\x,\z+\t,\y+\h);


\def\x{2}
\def\y{3}
\def\z{-2.8}
\def\w{1}
\def\h{1.8}
\def\t{1.8}
\filldraw [color=white,draw=black, line width=0.5pt](\x+\w,\z,\y+\h) -- (\x+\w,\z+\t,\y+\h)  -- (\x,\z+\t,\y+\h)-- (\x,\z+\t,\y) ;
\filldraw [color=white,draw=black, line width=0.5pt] (\x,\z,\y) -- (\x+\w,\z,\y) node[midway,below,black, line width=0.5pt] {256} -- 
    (\x+\w,\z+\t,\y) -- (\x,\z+\t,\y) -- (\x,\z,\y);
\filldraw [color=white,draw=black, line width=0.5pt] (\x+\w,\z,\y) -- (\x+\w,\z,\y+\h);
\filldraw [color=white,draw=black, line width=0.5pt] (\x+\w,\z+\t,\y) -- (\x+\w,\z+\t,\y+\h);
\filldraw [color=white,draw=black, line width=0.5pt] (\x,\z+\t,\y) -- (\x,\z+\t,\y+\h);

\def\x{3}
\def\y{3.5}
\def\z{-2.5}
\def\w{1}
\def\h{1}
\def\t{1}
\filldraw [color=white,draw=black, line width=0.5pt](\x+\w,\z,\y+\h) -- (\x+\w,\z+\t,\y+\h)  -- (\x,\z+\t,\y+\h)-- (\x,\z+\t,\y) ;
\filldraw [color=white,draw=black, line width=0.5pt] (\x,\z,\y) -- (\x+\w,\z,\y) node[midway,below,black, line width=0.5pt] {384} -- 
    (\x+\w,\z+\t,\y) -- (\x,\z+\t,\y) -- (\x,\z,\y);
\filldraw [color=white,draw=black, line width=0.5pt] (\x+\w,\z,\y) -- (\x+\w,\z,\y+\h);
\filldraw [color=white,draw=black, line width=0.5pt] (\x+\w,\z+\t,\y) -- (\x+\w,\z+\t,\y+\h);
\filldraw [color=white,draw=black, line width=0.5pt] (\x,\z+\t,\y) -- (\x,\z+\t,\y+\h);


\def\x{4.2}
\def\y{3.5}
\def\z{-2.5}
\def\w{1}
\def\h{1}
\def\t{1}
\filldraw [color=white,draw=black, line width=0.5pt](\x+\w,\z,\y+\h) -- (\x+\w,\z+\t,\y+\h)  -- (\x,\z+\t,\y+\h)-- (\x,\z+\t,\y) ;
\filldraw [color=white,draw=black, line width=0.5pt] (\x,\z,\y) -- (\x+\w,\z,\y) node[midway,below,black, line width=0.5pt] {384} -- 
    (\x+\w,\z+\t,\y) -- (\x,\z+\t,\y) -- (\x,\z,\y);
\filldraw [color=white,draw=black, line width=0.5pt] (\x+\w,\z,\y) -- (\x+\w,\z,\y+\h);
\filldraw [color=white,draw=black, line width=0.5pt] (\x+\w,\z+\t,\y) -- (\x+\w,\z+\t,\y+\h);
\filldraw [color=white,draw=black, line width=0.5pt] (\x,\z+\t,\y) -- (\x,\z+\t,\y+\h);


\def\x{5.4}
\def\y{3.5}
\def\z{-2.5}
\def\w{1}
\def\h{1}
\def\t{1}
\filldraw [color=white,draw=black, line width=0.5pt](\x+\w,\z,\y+\h) -- (\x+\w,\z+\t,\y+\h)  -- (\x,\z+\t,\y+\h)-- (\x,\z+\t,\y) ;
\filldraw [color=white,draw=black, line width=0.5pt] (\x,\z,\y) -- (\x+\w,\z,\y) node[midway,below,black, line width=0.5pt] {256} -- 
    (\x+\w,\z+\t,\y) -- (\x,\z+\t,\y) -- (\x,\z,\y);
\filldraw [color=white,draw=black, line width=0.5pt] (\x+\w,\z,\y) -- (\x+\w,\z,\y+\h);
\filldraw [color=white,draw=black, line width=0.5pt] (\x+\w,\z+\t,\y) -- (\x+\w,\z+\t,\y+\h);
\filldraw [color=white,draw=black, line width=0.5pt] (\x,\z+\t,\y) -- (\x,\z+\t,\y+\h);

\def\x{6.6}
\def\y{3.6}
\def\z{-2.1}
\def\w{1}
\def\h{0.4}
\def\t{0.4}
\filldraw [color=white,draw=black, line width=0.5pt](\x+\w,\z,\y+\h) -- (\x+\w,\z+\t,\y+\h)  -- (\x,\z+\t,\y+\h)-- (\x,\z+\t,\y) ;
\filldraw [color=white,draw=black, line width=0.5pt] (\x,\z,\y) -- (\x+\w,\z,\y) node[midway,below,black, line width=0.5pt] {4096} -- 
    (\x+\w,\z+\t,\y) -- (\x,\z+\t,\y) -- (\x,\z,\y);
\filldraw [color=white,draw=black, line width=0.5pt] (\x+\w,\z,\y) -- (\x+\w,\z,\y+\h);
\filldraw [color=white,draw=black, line width=0.5pt] (\x+\w,\z+\t,\y) -- (\x+\w,\z+\t,\y+\h);
\filldraw [color=white,draw=black, line width=0.5pt] (\x,\z+\t,\y) -- (\x,\z+\t,\y+\h);

\def\x{7.8}
\def\y{3.6}
\def\z{-2.1}
\def\w{1}
\def\h{0.4}
\def\t{0.4}
\filldraw [color=white,draw=black, line width=0.5pt](\x+\w,\z,\y+\h) -- (\x+\w,\z+\t,\y+\h)  -- (\x,\z+\t,\y+\h)-- (\x,\z+\t,\y) ;
\filldraw [color=white,draw=black, line width=0.5pt] (\x,\z,\y) -- (\x+\w,\z,\y) node[midway,below,black, line width=0.5pt] {4096} -- 
    (\x+\w,\z+\t,\y) -- (\x,\z+\t,\y) -- (\x,\z,\y);
\filldraw [color=white,draw=black, line width=0.5pt] (\x+\w,\z,\y) -- (\x+\w,\z,\y+\h);
\filldraw [color=white,draw=black, line width=0.5pt] (\x+\w,\z+\t,\y) -- (\x+\w,\z+\t,\y+\h);
\filldraw [color=white,draw=black, line width=0.5pt] (\x,\z+\t,\y) -- (\x,\z+\t,\y+\h);

\def\x{9.0}
\def\y{3.6}
\def\z{-2.1}
\def\w{1}
\def\h{0.4}
\def\t{0.4}
\filldraw [color=white,draw=black, line width=0.5pt](\x+\w,\z,\y+\h) -- (\x+\w,\z+\t,\y+\h)  -- (\x,\z+\t,\y+\h)-- (\x,\z+\t,\y) ;
\filldraw [color=white,draw=black, line width=0.5pt] (\x,\z,\y) -- (\x+\w,\z,\y) node[midway,below,black, line width=0.5pt] {21} -- 
    (\x+\w,\z+\t,\y) -- (\x,\z+\t,\y) -- (\x,\z,\y);
\filldraw [color=white,draw=black, line width=0.5pt] (\x+\w,\z,\y) -- (\x+\w,\z,\y+\h);
\filldraw [color=white,draw=black, line width=0.5pt] (\x+\w,\z+\t,\y) -- (\x+\w,\z+\t,\y+\h);
\filldraw [color=white,draw=black, line width=0.5pt] (\x,\z+\t,\y) -- (\x,\z+\t,\y+\h);


\def\x{12.3}
\def\y{-0.5}
\def\z{-5}
\def\w{0.5}
\def\h{7}
\def\t{7}
\filldraw [color=white,draw=black, line width=0.5pt](\x+\w,\z,\y+\h) -- (\x+\w,\z+\t,\y+\h)  -- (\x,\z+\t,\y+\h)-- (\x,\z+\t,\y) ;
\filldraw [color=white,draw=black, line width=0.5pt] (\x,\z,\y) -- (\x+\w,\z,\y) node[midway,below,black, line width=0.5pt] {21} -- 
    (\x+\w,\z+\t,\y) -- (\x,\z+\t,\y) -- (\x,\z,\y);
\filldraw [color=white,draw=black, line width=0.5pt] (\x+\w,\z,\y) -- (\x+\w,\z,\y+\h);
\filldraw [color=white,draw=black, line width=0.5pt] (\x+\w,\z+\t,\y) -- (\x+\w,\z+\t,\y+\h) node[midway, below,black, sloped] {\Large segmentation g.t.};
\filldraw [color=white,draw=black, line width=0.5pt] (\x,\z+\t,\y) -- (\x,\z+\t,\y+\h) node[midway,above, black, line width=0.5pt, sloped] {\Large pixelwise prediction};

\node(gt)[canvas is zy plane at x=0,fill=white] at (15.4,0)  {\includegraphics[width=7cm,height=7cm]{001763.png}};

\coordinate (A) at (5,4);
\coordinate (B) at (12.1,4);
\coordinate (C) at (4.9,3.5);
\coordinate (D) at (12,3.5);
\draw[-latex, line width=3pt] (A) -- (B) node[midway,above,fill=white] {\Large{forward/inference}};
\draw[-latex, line width=3pt] (D) -- (C) node[midway,below,fill=white] {\Large{backward/learning}};
\end{tikzpicture}

\end{document}